UK Gilts yield curve modelling

Sources

Data cleansing

We begin by limiting the DMO data to conventional gilts only and sanitising UTF-8 encoding (£ symbol and fraction characters in Gilt Names). Also be wary of Excel/Python interpreting 2-digit years accoridng to POSIX and ISO C standards: values 69–99 are mapped to 1969–1999, and values 0–68 are mapped to 2000–2068 (https://docs.python.org/3/library/time.html) - so explicitly set all years to 4 digits, specifically far dated maturities e.g. 50y.

Then compute Time to Maturity in days and years as time between Redemption date and Settlement date:

In [1]:
from pandas.io.excel import read_excel
import pandas as pd
import numpy as np
from scipy import optimize
dateparse = lambda x: pd.datetime.strptime(x, '%d/%m/%Y')
DMOdata = pd.read_csv(r'data/GILTS2012ToPresent.csv', parse_dates=['Close of Business Date', 'Redemption Date'], date_parser=dateparse)

dmo_ix = DMOdata.set_index('Close of Business Date')
dmo_ix['DaysToMaturity'] = [
    (pd.to_datetime(dmo_ix['Redemption Date'].values[i]) - 
     pd.to_datetime(dmo_ix.index.values[i])
    ).days for i in range(len(dmo_ix))]
##the above can be done using 'apply' something like this:
##dmo_ix.apply(lambda x: (pd.to_datetime(x['Redemption Date']).day - pd.to_datetime(x.index)).days) but mind rolling maturities.
##
dmo_ix['YearsToMaturity'] = dmo_ix['DaysToMaturity']/365 
dmo_ix['Coupon'] = dmo_ix['Gilt Name'].apply(lambda x: float(x.split('%')[0]))
In [3]:
dmo_ix.drop(dmo_ix.ix[
         '2013-04-03'
        ], inplace=True)
#'2014-03-05'
##drop dates with suspect data

Nelson Siegel Svensson calibration

Yield curve data is calibrated to the NSS model and yield surfaces are generated for raw data, fitted models and residuals

In [4]:
def NSS_optfit(T, beta_0, beta_1, beta_2, beta_3, tau_1, tau_2):
    
    short = beta_1*((1-np.exp(-T/tau_1))/(T/tau_1))
    hump1 = beta_2*((((1-np.exp(-T/tau_1))/(T/tau_1))) - (np.exp(-T/tau_1)))
    hump2 = beta_3*((((1-np.exp(-T/tau_2))/(T/tau_2)))-(np.exp(-T/tau_2)))
    
    fitted = beta_0 + short + hump1 + hump2
    return fitted
In [5]:
NSSparams = {}
NSSmodel = []
yieldcurves = []
residuals = []
for idx in dmo_ix.index.unique(): ##can equivalently do this using "apply" across the dataframe, this is just more explicit to debug
    try:
        data = dmo_ix.ix[idx].pivot(values='Yield (%)', columns='YearsToMaturity')
    except:
        print('data yield curve pivot problem on this date', idx)
    xdata, ydata = data.columns.values, data.values[0]
    try:
        popt, pcov = scipy.optimize.curve_fit(NSS_optfit, xdata, ydata, maxfev = 10000)
    except:
        print('curve_fitting maxfev problem on this date',idx)
    y = NSS_optfit(xdata, *popt)
    NSSparams[idx] = popt
    yieldcurves.append(data)
    NSSmodel.append(y)
    residuals.append(y - data.values[0])
In [6]:
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
import plotly.graph_objs as go
from plotly.graph_objs import *
init_notebook_mode()
In [7]:
raw_yld_data = [
    go.Surface(
        z=[i.values[0] for i in yieldcurves],
        x = [i.columns.values for i in yieldcurves],
        y = [pd.to_datetime(i.index.values[0]) for i in yieldcurves]
    )
]

dataNSS = [
    go.Surface(
        z=NSSmodel,
        x = [i.columns.values for i in yieldcurves],
        y = [pd.to_datetime(i.index.values[0]) for i in yieldcurves]
    )
]

residuals_data = [
    go.Surface(
        z=residuals,
        x = [i.columns.values for i in yieldcurves],
        y = [pd.to_datetime(i.index.values[0]) for i in yieldcurves]
    )
]

NSSparams_data = [
    go.Surface(
        z=NSSparams,
        x = ['beta_0', 'beta_1', 'beta_2', 'beta_3', 'tau_1', 'tau_2'],
        y = [pd.to_datetime(i.index.values[0]) for i in yieldcurves]
    )
]

layout = go.Layout(
	    title='Yields surface',
	    width=1024,
	    height=768,
	    scene=Scene(
        xaxis=XAxis(title='Maturity (years)'),
        yaxis=YAxis(title='Close of Business date'),
        zaxis=ZAxis(title='Yield (%)')
        )


	    )
Raw yield data surface
In [8]:
fig = go.Figure(data=raw_yld_data, layout=layout)
iplot(fig)
Fitted NSS yield surface
In [9]:
fig_NSS = go.Figure(data=dataNSS, layout=layout)
iplot(fig_NSS)
Residuals yield surface
In [10]:
fig_residuals = go.Figure(data=residuals_data, layout=layout)
iplot(fig_residuals)

The yields and residuals surfaces above indicate data anomalies notably for 2014-03-05 exhibiting a prominent hump. Suggest winsorizing distribution to eliminate outliers.

BOE nominal yield curve data

We briefly compare BOE nominal yield curves fitted using a VRP methodology against our NSS model fit. Chiefly, we are interested in the difference between forward and spot yield to estimate carry. We test an arbitrary date (2015-06-29) to demonsrate this:

In [40]:
from pandas.io.excel import read_excel

urls = [
    r'data\uknom05_mdaily.xls', #2005 to 2015 only
    #r'data\uknom16_mdaily.xlsx' #2016 to present 
    ]


spot_curve = pd.DataFrame()
short_end_spot_curve = pd.DataFrame()
for f in urls:
    spot_data = read_excel(f, sheetname=8)
    spot_curve = spot_curve.append(spot_data)
    short_end_spot_data = read_excel(f, sheetname=6)
    short_end_spot_curve = short_end_spot_curve.append(short_end_spot_data)
    

fwd_spot_curve = pd.DataFrame()
fwd_short_end_spot_curve = pd.DataFrame()
for f in urls:
    fwd_spot_data = read_excel(f, sheetname=5)
    fwd_spot_curve = fwd_spot_curve.append(fwd_spot_data)
    fwd_short_end_spot_data = read_excel(f, sheetname=4)
    fwd_short_end_spot_curve = fwd_short_end_spot_curve.append(fwd_short_end_spot_data)


spot_curve.columns = spot_curve.loc['years:']
spot_curve.columns.name = 'years'
valid_index = spot_curve.index[4:]
spot_curve = spot_curve.loc[valid_index]

fwd_spot_curve.columns = fwd_spot_curve.loc['years:']
fwd_spot_curve.columns.name = 'years'
fwd_valid_index = fwd_spot_curve.index[4:]
fwd_spot_curve = fwd_spot_curve.loc[fwd_valid_index]


short_end_spot_curve.columns = short_end_spot_curve.loc['years:']
short_end_spot_curve.columns.name = 'years'
short_valid_index = short_end_spot_curve.index[4:]
short_end_spot_curve = short_end_spot_curve.loc[short_valid_index]


fwd_short_end_spot_curve.columns = fwd_short_end_spot_curve.loc['years:']
fwd_short_end_spot_curve.columns.name = 'years'
fwd_short_valid_index = fwd_short_end_spot_curve.index[4:]
fwd_short_end_spot_curve = fwd_short_end_spot_curve.loc[fwd_short_valid_index]


combined_fwd_data = pd.concat([fwd_short_end_spot_curve, fwd_spot_curve], axis=1, join='outer')

combined_fwd_data.sort_index(axis=1, inplace=True)

combined_data = pd.concat([short_end_spot_curve, spot_curve], axis=1, join='outer')
combined_data.sort_index(axis=1, inplace=True)



def filter_func(group):
    return group.isnull().sum(axis=1) <= 50

combined_data = combined_data.groupby(level=0).filter(filter_func)
combined_fwd_data = combined_fwd_data.groupby(level=0).filter(filter_func)


from scipy.interpolate import interp1d

maturity = pd.Series((np.arange(12 * 25) + 1) / 12)
key = lambda x: x.date
by_day = combined_data.groupby(level=0)
fwd_by_day = combined_fwd_data.groupby(level=0)

functDict = {} ## holds interpolated functions of the forward curve for each date so we can use it to estimate carry later on

##as BOE model has already been estimated by VRP we just do a simple interpolation rather than NSS
def interpolate_maturities(group, build_funct_dict=False): 
    a = group.T.dropna().reset_index()
    f = interp1d(a.iloc[:, 0], a.iloc[:, 1], kind='cubic', bounds_error=False, assume_sorted=True)
    if build_funct_dict:
        functDict[group.index.values[0]] = f
    return pd.Series(maturity.apply(f).values, index=maturity.values)

cleaned_fwd_spot_curve = fwd_by_day.apply(interpolate_maturities, build_funct_dict=True)
cleaned_spot_curve = by_day.apply(interpolate_maturities)
In [ ]:
##Utility method to structure a butterfly portfolio
##Compute proportions of near and future matiries per unit position in medium maturity to structure the butterfly to be duration neutral:

def butterfly_position_solver(near, medium, far):  #return proportion of near and far maturies per unit medium position
    far_duration = float(far['Modified Duration'])
    medium_duration = float(medium['Modified Duration'])
    near_duration = float(near['Modified Duration'])
    far_price = float(far['Dirty Price'])
    medium_price = float(medium['Dirty Price'])
    near_price =float(near['Dirty Price'])
    a = np.array([[near_duration, far_duration], 
                  [near_price, far_price]])
    b = np.array([medium_duration,
                  medium_price])
    return np.linalg.solve(a, b)